pacman::p_load(rsample)


boot_percentile_function <- function(alpha, 
                                     boots, 
                                     statistics) {
  alpha <- alpha
  statistics <- statistics
  boots <- boots
  bootstrap_percentiles <- purrr::map(
    .x = boots,
    .f = ~ rsample::int_pctl(
      .data = .x,
      alpha = alpha, 
      statistics = statistics
    )
  )
}


# Define Variables  ####

set.seed(853147)

outcome_vars <- c("news_sharing", 
                  "news_distinguish")



# Bootstrap of Observational Models  ####


tic()
## bootstraps
bootstraps <- data_nested %>%
  ungroup() %>% 
  select(data_type, outcome, data) %>% 
  # subset the data for computational feasibility
  mutate(data = map(data,
                    ~ select(.,
                             any_of(outcome_vars),
                             any_of(observational_vars),
                             any_of(authoritarian_vars), 
                             starts_with("num_"), 
                             matches("index"), 
                             "ID", 
                             "country_id"))) %>%
  # nest data by respondent ID for clustered bootstrap resampling
  mutate(data = map(data, 
                    ~ nest(.x, data = -c("ID")))) %>% 
  #sample 5000 times over each subset of the data
  mutate(boots = map2(
    .x = data,
    .y = outcome, 
    .f = ~ rsample::bootstraps(.x,
                               times = boot_times,
                               apparent = T
    )
  )) %>% 
  # estimate model on each of the splits
  mutate(boots = map2(
    .x = boots,
    .y = outcome, 
    ~ mutate(
      .x,
      # lm observational model simple
      lm_observational_simple_tidy = pmap(
        list(..1 = .x$splits,
             ..2 = .y),
        .f = ~ map(observational_vars,
                   function(observational_vars) {

                     fit_lm_observational(
                       outcome = ..2,
                       data = analysis(..1) %>% 
                         unnest(data),
                       model = "simple",
                       predictor = paste0(observational_vars),
                       tidy = TRUE
                     )
                   })),
      # lm observational model full
      lm_observational_full_tidy = pmap(
        list(..1 = .x$splits,
             ..2 = .y),
        .f = ~ fit_lm_observational(
          outcome = ..2,
          data = analysis(..1) %>% 
            unnest(data),
          model = "full",
          tidy = TRUE
        )),
      # lm right-wing authoritarian model simple
      lm_authoritarian_simple_tidy = pmap(
          list(..1 = .x$splits,
               ..2 = .y), 
          .f = ~ map(alt_authoritarian_vars, 
                     function(alt_authoritarian_vars) {
                       
                       fit_lm_authoritarian(
                         outcome = ..2, 
                         data = analysis(..1) %>% 
                           unnest(data), 
                         model = "simple", 
                         predictor = paste0(alt_authoritarian_vars), 
                         tidy = TRUE
                       ) 
                     })),
      # lm right-wing authoritarian model full 
      lm_authoritarian_full_tidy = pmap(
        list(..1 = .x$splits,
             ..2 = .y), 
        .f = ~ map(alt_authoritarian_vars, 
                   function(alt_authoritarian_vars) {
                     
                     fit_lm_authoritarian(
                       outcome = ..2, 
                       data = analysis(..1) %>% 
                         unnest(data), 
                       model = "full", 
                       predictor = paste0(alt_authoritarian_vars), 
                       tidy = TRUE
                     ) 
                   }))    
    )
  )) %>% 
  select(data_type, outcome, boots) %>% 
  mutate(boots = map(boots, 
                     ~ select(., c("splits", "id", matches("tidy"))))
  )



# Confidence Intervals of Observational Models  ####

cis_observational <- bootstraps %>%
  select(boots, data_type, outcome) %>%
  mutate(boots = map(.x = boots,
                     .f = ~ select(., c("splits", "id", matches("observational"))))) %>%
  # create a column that stores the wide tibbles of the nested list of tibbles
  # preserves the rset structure in boots which is necessary for pctl_int
  mutate(lm_observational = map(boots,
                                ~ unnest_wider(.,
                                               col = lm_observational_simple_tidy) %>%
                                  select(-splits, -id, -matches("full")))) %>%
  # add the wide tibbles back to the rset object (boots)
  mutate(boots = map2(.x = boots,
                      .y = lm_observational,
                      .f = ~ bind_cols(.x, .y) %>%
                        # deselect nested list of tibbles
                        select(., -lm_observational_simple_tidy))) %>%
  # deselect helper column with wide tibbles
  select(-lm_observational) %>%
  # 90 CI
  mutate(boot_pctl_90_simple =  map(
    .x = boots,
    ~ map(observational_vars,
          function(observational_vars) {
            rsample::int_pctl(
              .data = .x,
              alpha = 0.10,
              statistics = paste0(observational_vars))
          }
    ))) %>%
  # 95 CI
  mutate(boot_pctl_95_simple =  map(
    .x = boots,
    ~ map(observational_vars,
          function(observational_vars) {
            rsample::int_pctl(
              .data = .x,
              alpha = 0.05,
              statistics = paste0(observational_vars))
          }
    ))) %>%
  # 99 CI
  mutate(boot_pctl_99_simple =  map(
    .x = boots,
    ~ map(observational_vars,
          function(observational_vars) {
            rsample::int_pctl(
              .data = .x,
              alpha = 0.01,
              statistics = paste0(observational_vars))
          }
    ))) %>%
  # 90 CI full
  mutate(boot_pctl_90_full = boot_percentile_function(
    alpha = 0.10,
    boots = boots,
    statistics =  "lm_observational_full_tidy")
  ) %>%
  # 95 CI full
  mutate(boot_pctl_95_full = boot_percentile_function(
    alpha = 0.05,
    boots = boots,
    statistics =  "lm_observational_full_tidy")
  ) %>%
  # 99 CI full
  mutate(boot_pctl_99_full = boot_percentile_function(
    alpha = 0.01,
    boots = boots,
    statistics =  "lm_observational_full_tidy")
  ) %>%
  select(data_type, outcome, starts_with("boot_pctl"))


cis_observational <- bind_rows(
  # full estimates
  cis_observational %>%
    select(data_type, outcome, ends_with("full")) %>%
    # pivot longer full estimates
    pivot_longer(cols = c(ends_with("full")),
                 names_to = c("ci_level", "model_specification"),
                 names_pattern = "boot_pctl_(.*)_(.*)",
                 values_to = "boot_pctl") %>%
    unnest(boot_pctl),
  # simple estimates
  cis_observational %>%
    select(data_type, outcome, ends_with("simple")) %>%
    unnest(cols = c(ends_with("simple"))) %>%
    mutate(predictor = names(boot_pctl_90_simple)) %>%
    # pivot longer simple estimates
    pivot_longer(cols = c(ends_with("simple")),
                 names_to = c("ci_level", "model_specification"),
                 names_pattern = "boot_pctl_(.*)_(.*)",
                 values_to = "boot_pctl") %>%
    unnest(boot_pctl)
)



cis_observational %>%
  write_csv(.,
            file = "data/bootstrapped_cis/cis_observational.csv")



# Confidence Intervals of Right-Wing Authoritarian Models  ####
cis_authoritarian <- bootstraps %>%
  select(boots, data_type, outcome) %>%
  mutate(boots = map(.x = boots, 
                     .f = ~ select(., c("splits", "id", matches("authoritarian"))))) %>% 
  # create a column that stores the wide tibbles of the nested list of tibbles
  # preserves the rset structure in boots which is necessary for pctl_int
  mutate(lm_authoritarian_simple = map(boots, 
                                ~ unnest_wider(., 
                                               col = lm_authoritarian_simple_tidy, 
                                               names_repair = ~paste0(.x, "_simple")) %>% 
                                  select(matches(paste0(alt_authoritarian_vars, collapse = "|"))) 
                                )) %>% 
  # add the wide tibbles back to the rset object (boots)
  mutate(boots = map2(.x = boots, 
                      .y = lm_authoritarian_simple, 
                      .f = ~ bind_cols(.x, .y) %>% 
                        # deselect nested list of tibbles
                        select(., -lm_authoritarian_simple_tidy))) %>% 
  # deselect helper column with wide tibbles
  select(-lm_authoritarian_simple) %>%
  # create a column that stores the wide tibbles of the nested list of tibbles
  # preserves the rset structure in boots which is necessary for pctl_int
  mutate(lm_authoritarian_full = map(boots, 
                                       ~ unnest_wider(., 
                                                      col = lm_authoritarian_full_tidy, 
                                                      names_repair = ~paste0(.x, "_full")) %>% 
                                         select(matches(paste0(alt_authoritarian_vars, collapse = "|"))) 
  )) %>% 
  # add the wide tibbles back to the rset object (boots)
  mutate(boots = map2(.x = boots, 
                      .y = lm_authoritarian_full, 
                      .f = ~ bind_cols(.x, .y) %>% 
                        # deselect nested list of tibbles
                        select(., -lm_authoritarian_full_tidy))) %>% 
  # deselect helper column with wide tibbles
  select(-lm_authoritarian_full) %>% 
  # 90 CI 
  mutate(boot_pctl_90_simple =  map(
    .x = boots, 
    ~ map(alt_authoritarian_vars, 
          function(alt_authoritarian_vars) {
            rsample::int_pctl(
              .data = .x,
              alpha = 0.10,  
              statistics = paste0(alt_authoritarian_vars, "_simple"))
          }
    ))) %>%   
  # 95 CI
  mutate(boot_pctl_95_simple =  map(
    .x = boots, 
    ~ map(alt_authoritarian_vars, 
          function(alt_authoritarian_vars) {
            rsample::int_pctl(
              .data = .x,
              alpha = 0.05,  
              statistics = paste0(alt_authoritarian_vars, "_simple"))
          }
    ))) %>% 
  # 99 CI 
  mutate(boot_pctl_99_simple =  map(
    .x = boots, 
    ~ map(alt_authoritarian_vars, 
          function(alt_authoritarian_vars) {
            rsample::int_pctl(
              .data = .x,
              alpha = 0.01,  
              statistics = paste0(alt_authoritarian_vars, "_simple"))
          }
    )))  %>% 
  # 90 CI 
  mutate(boot_pctl_90_full =  map(
    .x = boots, 
    ~ map(alt_authoritarian_vars, 
          function(alt_authoritarian_vars) {
            rsample::int_pctl(
              .data = .x,
              alpha = 0.10,  
              statistics = paste0(alt_authoritarian_vars, "_full"))
          }
    ))) %>%   
  # 95 CI
  mutate(boot_pctl_95_full =  map(
    .x = boots, 
    ~ map(alt_authoritarian_vars, 
          function(alt_authoritarian_vars) {
            rsample::int_pctl(
              .data = .x,
              alpha = 0.05,  
              statistics = paste0(alt_authoritarian_vars, "_full"))
          }
    ))) %>% 
  # 99 CI 
  mutate(boot_pctl_99_full =  map(
    .x = boots, 
    ~ map(alt_authoritarian_vars, 
          function(alt_authoritarian_vars) {
            rsample::int_pctl(
              .data = .x,
              alpha = 0.01,  
              statistics = paste0(alt_authoritarian_vars, "_full"))
          }
    )))   
  

cis_authoritarian <- bind_rows(
  # simple estimates
  cis_authoritarian %>% 
    select(data_type, outcome, ends_with("simple")) %>% 
    unnest(cols = c(ends_with("simple"))) %>% 
    mutate(alt_authoritarian_var = names(boot_pctl_90_simple)) %>%  
    # pivot longer simple estimates
    pivot_longer(cols = c(ends_with("simple")), 
                 names_to = c("ci_level", "model_specification"), 
                 names_pattern = "boot_pctl_(.*)_(.*)",
                 values_to = "boot_pctl") %>% 
    unnest(boot_pctl), 
  # full estimates
  cis_authoritarian %>% 
    select(data_type, outcome, ends_with("full")) %>% 
    unnest(cols = c(ends_with("full"))) %>% 
    mutate(alt_authoritarian_var = names(boot_pctl_90_full)) %>%  
    # pivot longer full estimates
    pivot_longer(cols = c(ends_with("full")), 
                 names_to = c("ci_level", "model_specification"), 
                 names_pattern = "boot_pctl_(.*)_(.*)",
                 values_to = "boot_pctl") %>% 
    unnest(boot_pctl)
  )

cis_authoritarian %>% 
  write_csv(.,
            file = "data/bootstrapped_cis/cis_alt_authoritarian.csv")







# By Country: Bootstrap of Observational Models  ####

rm(bootstraps, 
   cis_observational, 
   cis_authoritarian)



## bootstraps
bootstraps <- data_nested_by_country %>%
  select(country, data_type, outcome, data) %>% 
  ungroup() %>% 
  # subset the data for computational feasibility
  mutate(data = map(data,
                    ~ select(.,
                             any_of(outcome_vars),
                             any_of(observational_vars),
                             any_of(authoritarian_vars),
                             starts_with("num_"), 
                             matches("index"), 
                             "country_id", 
                             "ID"))) %>%
  # nest data by respondent ID for clustered bootstrap resampling
  mutate(data = map(data, 
                    ~ nest(.x, data = -c("ID")))) %>% 
  # sample 5000 times over each subset of the data
  mutate(boots = map2(
    .x = data,
    .y = outcome, 
    .f = ~ rsample::bootstraps(.x,
                               times = boot_times,
                               apparent = T
    )
  )) %>% 
  # estimate model on each of the splits
  mutate(boots = map2(
    .x = boots,
    .y = outcome, 
    ~ mutate(
      .x,
      # lm observational model simple
      lm_observational_simple_tidy = pmap(
        list(..1 = .x$splits,
             ..2 = .y),
        .f = ~ map(observational_vars,
                   function(observational_vars) {

                     fit_lm_observational(
                       outcome = ..2,
                       data = analysis(..1) %>% 
                         unnest(data),
                       model = "simple",
                       predictor = paste0(observational_vars),
                       tidy = TRUE,
                       by_country = TRUE
                     )
                   })),
      # lm observational model full
      lm_observational_full_tidy = pmap(
        list(..1 = .x$splits,
             ..2 = .y),
        .f = ~ fit_lm_observational(
          outcome = ..2,
          data = analysis(..1) %>% 
            unnest(data),
          model = "full",
          tidy = TRUE,
          by_country = TRUE
        )),
      # lm authoritarian model simple
      lm_authoritarian_simple_tidy = pmap(
        list(..1 = .x$splits,
             ..2 = .y), 
        .f = ~ map(alt_authoritarian_vars, 
                   function(alt_authoritarian_vars) {
                     
                     fit_lm_authoritarian(
                       outcome = ..2, 
                       data = analysis(..1) %>% 
                         unnest(data), 
                       model = "simple", 
                       predictor = paste0(alt_authoritarian_vars), 
                       tidy = TRUE, 
                       by_country = TRUE
                     ) 
                   })),
      # lm authoritarian model full 
      lm_authoritarian_full_tidy = pmap(
        list(..1 = .x$splits,
             ..2 = .y), 
        .f = ~ map(alt_authoritarian_vars, 
                   function(alt_authoritarian_vars) {
                     
                     fit_lm_authoritarian(
                       outcome = ..2, 
                       data = analysis(..1) %>% 
                         unnest(data), 
                       model = "full", 
                       predictor = paste0(alt_authoritarian_vars), 
                       tidy = TRUE, 
                       by_country = TRUE
                     ) 
                   }))      
    )
  )) %>% 
  select(country, data_type, outcome, boots) %>% 
  mutate(boots = map(boots, 
                     ~ select(., c("splits", "id", matches("tidy"))))
  )


# By Country: Confidence Intervals of Observational Models ####

cis_observational_by_country <- bootstraps %>%
  select(country, boots, data_type, outcome) %>%
  # create a column that stores the wide tibbles of the nested list of tibbles
  # preserves the rset structure in boots which is necessary for pctl_int
  mutate(lm_observational = map(boots,
                                ~ unnest_wider(.,
                                               col = lm_observational_simple_tidy) %>%
                                  select(-splits, -id, -matches("full")))) %>%
  # add the wide tibbles back to the rset object (boots)
  mutate(boots = map2(.x = boots,
                      .y = lm_observational,
                      .f = ~ bind_cols(.x, .y) %>%
                        # deselect nested list of tibbles
                        select(., -lm_observational_simple_tidy))) %>%
  # deselect helper column with wide tibbles
  select(-lm_observational) %>%
  # 90 CI
  mutate(boot_pctl_90_simple =  map(
    .x = boots,
    ~ map(observational_vars,
          function(observational_vars) {
            rsample::int_pctl(
              .data = .x,
              alpha = 0.10,
              statistics = paste0(observational_vars))
          }
    ))) %>%
  # 95 CI
  mutate(boot_pctl_95_simple =  map(
    .x = boots,
    ~ map(observational_vars,
          function(observational_vars) {
            rsample::int_pctl(
              .data = .x,
              alpha = 0.05,
              statistics = paste0(observational_vars))
          }
    ))) %>%
  # 99 CI
  mutate(boot_pctl_99_simple =  map(
    .x = boots,
    ~ map(observational_vars,
          function(observational_vars) {
            rsample::int_pctl(
              .data = .x,
              alpha = 0.01,
              statistics = paste0(observational_vars))
          }
    ))) %>%
  # 90 CI full
  mutate(boot_pctl_90_full = boot_percentile_function(
    alpha = 0.10,
    boots = boots,
    statistics =  "lm_observational_full_tidy")
  ) %>%
  # 95 CI full
  mutate(boot_pctl_95_full = boot_percentile_function(
    alpha = 0.05,
    boots = boots,
    statistics =  "lm_observational_full_tidy")
  ) %>%
  # 99 CI full
  mutate(boot_pctl_99_full = boot_percentile_function(
    alpha = 0.01,
    boots = boots,
    statistics =  "lm_observational_full_tidy")
  ) %>%
  select(country, data_type, outcome, starts_with("boot_pctl"))




cis_observational_by_country <- bind_rows(
  # full estimates
  cis_observational_by_country %>%
    select(country, data_type, outcome, ends_with("full")) %>%
    # pivot longer full estimates
    pivot_longer(cols = c(ends_with("full")),
                 names_to = c("ci_level", "model_specification"),
                 names_pattern = "boot_pctl_(.*)_(.*)",
                 values_to = "boot_pctl") %>%
    unnest(boot_pctl),
  # simple estimates
  cis_observational_by_country %>%
    select(country, data_type, outcome, ends_with("simple")) %>%
    unnest(cols = c(ends_with("simple"))) %>%
    mutate(predictor = names(boot_pctl_90_simple)) %>%
    # pivot longer simple estimates
    pivot_longer(cols = c(ends_with("simple")),
                 names_to = c("ci_level", "model_specification"),
                 names_pattern = "boot_pctl_(.*)_(.*)",
                 values_to = "boot_pctl") %>%
    unnest(boot_pctl)
)


cis_observational_by_country %>%
  write_csv(.,
            file = "data/bootstrapped_cis/cis_observational_by_country.csv")



# By Country: Confidence Intervals of Authoritarian Models  ####
cis_authoritarian_by_country <- bootstraps %>%
  select(boots, data_type, outcome) %>%
  mutate(boots = map(.x = boots, 
                     .f = ~ select(., c("splits", "id", matches("authoritarian"))))) %>% 
  # create a column that stores the wide tibbles of the nested list of tibbles
  # preserves the rset structure in boots which is necessary for pctl_int
  mutate(lm_authoritarian_simple = map(boots, 
                                       ~ unnest_wider(., 
                                                      col = lm_authoritarian_simple_tidy, 
                                                      names_repair = ~paste0(.x, "_simple")) %>% 
                                         select(matches(paste0(alt_authoritarian_vars, collapse = "|"))) 
  )) %>% 
  # add the wide tibbles back to the rset object (boots)
  mutate(boots = map2(.x = boots, 
                      .y = lm_authoritarian_simple, 
                      .f = ~ bind_cols(.x, .y) %>% 
                        # deselect nested list of tibbles
                        select(., -lm_authoritarian_simple_tidy))) %>% 
  # deselect helper column with wide tibbles
  select(-lm_authoritarian_simple) %>%
  # create a column that stores the wide tibbles of the nested list of tibbles
  # preserves the rset structure in boots which is necessary for pctl_int
  mutate(lm_authoritarian_full = map(boots, 
                                     ~ unnest_wider(., 
                                                    col = lm_authoritarian_full_tidy, 
                                                    names_repair = ~paste0(.x, "_full")) %>% 
                                       select(matches(paste0(alt_authoritarian_vars, collapse = "|"))) 
  )) %>% 
  # add the wide tibbles back to the rset object (boots)
  mutate(boots = map2(.x = boots, 
                      .y = lm_authoritarian_full, 
                      .f = ~ bind_cols(.x, .y) %>% 
                        # deselect nested list of tibbles
                        select(., -lm_authoritarian_full_tidy))) %>% 
  # deselect helper column with wide tibbles
  select(-lm_authoritarian_full) %>% 
  # 90 CI 
  mutate(boot_pctl_90_simple =  map(
    .x = boots, 
    ~ map(alt_authoritarian_vars, 
          function(alt_authoritarian_vars) {
            rsample::int_pctl(
              .data = .x,
              alpha = 0.10,  
              statistics = paste0(alt_authoritarian_vars, "_simple"))
          }
    ))) %>%   
  # 95 CI
  mutate(boot_pctl_95_simple =  map(
    .x = boots, 
    ~ map(alt_authoritarian_vars, 
          function(alt_authoritarian_vars) {
            rsample::int_pctl(
              .data = .x,
              alpha = 0.05,  
              statistics = paste0(alt_authoritarian_vars, "_simple"))
          }
    ))) %>% 
  # 99 CI 
  mutate(boot_pctl_99_simple =  map(
    .x = boots, 
    ~ map(alt_authoritarian_vars, 
          function(alt_authoritarian_vars) {
            rsample::int_pctl(
              .data = .x,
              alpha = 0.01,  
              statistics = paste0(alt_authoritarian_vars, "_simple"))
          }
    )))  %>% 
  # 90 CI 
  mutate(boot_pctl_90_full =  map(
    .x = boots, 
    ~ map(alt_authoritarian_vars, 
          function(alt_authoritarian_vars) {
            rsample::int_pctl(
              .data = .x,
              alpha = 0.10,  
              statistics = paste0(alt_authoritarian_vars, "_full"))
          }
    ))) %>%   
  # 95 CI
  mutate(boot_pctl_95_full =  map(
    .x = boots, 
    ~ map(alt_authoritarian_vars, 
          function(alt_authoritarian_vars) {
            rsample::int_pctl(
              .data = .x,
              alpha = 0.05,  
              statistics = paste0(alt_authoritarian_vars, "_full"))
          }
    ))) %>% 
  # 99 CI 
  mutate(boot_pctl_99_full =  map(
    .x = boots, 
    ~ map(alt_authoritarian_vars, 
          function(alt_authoritarian_vars) {
            rsample::int_pctl(
              .data = .x,
              alpha = 0.01,  
              statistics = paste0(alt_authoritarian_vars, "_full"))
          }
    )))   


cis_authoritarian_by_country <- bind_rows(
  # simple estimates
  cis_authoritarian_by_country %>% 
    select(data_type, outcome, ends_with("simple")) %>% 
    unnest(cols = c(ends_with("simple"))) %>% 
    mutate(alt_authoritarian_var = names(boot_pctl_90_simple)) %>%  
    # pivot longer simple estimates
    pivot_longer(cols = c(ends_with("simple")), 
                 names_to = c("ci_level", "model_specification"), 
                 names_pattern = "boot_pctl_(.*)_(.*)",
                 values_to = "boot_pctl") %>% 
    unnest(boot_pctl), 
  # full estimates
  cis_authoritarian_by_country %>% 
    select(data_type, outcome, ends_with("full")) %>% 
    unnest(cols = c(ends_with("full"))) %>% 
    mutate(alt_authoritarian_var = names(boot_pctl_90_full)) %>%  
    # pivot longer full estimates
    pivot_longer(cols = c(ends_with("full")), 
                 names_to = c("ci_level", "model_specification"), 
                 names_pattern = "boot_pctl_(.*)_(.*)",
                 values_to = "boot_pctl") %>% 
    unnest(boot_pctl)
)

cis_authoritarian_by_country %>% 
  write_csv(.,
            file = "data/bootstrapped_cis/cis_alt_authoritarian_by_country.csv")


toc()


rm(bootstraps, 
   cis_observational_by_country, 
   cis_authoritarian_by_country)